In addition to the RL task this dataset contains self-reported ‘real-world’ risk behaviors, several impultivity surveys and BART. One question of interest was whether any of the measures predicted the ‘real-world’ risk behaviors. This report looks into that question.

DV: Real-world risk dimensions

Real-world risk was measures using two questionnaires with a total of 63 items. Not all of these had sufficient variance to be useful so we filtered out those that could not be transformed successfully to have skewness <2 and applied hierarchical clustering on the remaining items. This resulted in three factors: Alcohol, smoking, social risk taking.

Here is the list of items that were skewed and could not transformed successfully.

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Using transformation:log
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
45 data positively skewed data were transformed:
carepf1
carepf2
carepf3
carepf4
carepf5
carepf6
carepf8
carepf9
carepf10
carepf11
carepf12
carepf13
carepf14
carepf15
carepf16
carepf17
carepf18
carepf19
carepf20
carepf21
carepf22
carepf23
carepf24
carepf25
carepf26
carepf27
carepf28
carepf29
carepf30
duq3
duq6
duq9
duq10
duq17
duq20
duq21
duq22
duq23
duq24
duq25
duq29
duq30
duq31
duq32
duq33
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Dropping 30 positively skewed data that could not be transformed successfully:
carepf1
carepf3
carepf4
carepf10
carepf11
carepf12
carepf14
carepf16
carepf19
carepf21
carepf22
carepf23
carepf24
carepf25
carepf27
carepf29
duq3
duq6
duq9
duq17
duq21
duq22
duq23
duq24
duq25
duq29
duq30
duq31
duq32
duq33
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
No negatively skewed variables found.* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

Here is the tree structure of the remaining items

library(ggdendro)

try$labels = gsub(".logTr","",try$labels)
try$labels = gsub("_.*","",try$labels)

input_path = '/Users/zeynepenkavi/Dropbox/PoldrackLab/DevStudy_Analyses/input/'
survey_questions = read.csv(paste0(input_path,"survey_questions.csv"))

tmp = data.frame(label=try$labels) %>%
  left_join(survey_questions, by="label")

tmp$question = gsub("\\.", " ", tmp$question)
tmp$question = gsub("^\\d+|\\d+$", "", tmp$question) 
try$labels = tmp$question
# ggdendrogram(try, rotate=TRUE)+
#   theme(panel.border = element_blank())

p = segment(dendro_data(try)) %>%
  mutate(seg_col = ifelse(yend<1.76 & xend < 8, 1, ifelse(yend<1.76 & xend>7 & xend<18, 2, ifelse(yend<1.76 & xend>17 & xend < 22, 3, ifelse(yend<1.76 & xend>21,4,5))))) %>%
  ggplot(aes(x = label))+
  geom_segment(aes(x = x, y = y, xend = xend, yend = yend, col=factor(seg_col)),alpha = 1, size=1.5)+
  theme(panel.border = element_blank(),
        panel.grid = element_blank(), 
        legend.position = "none",
        axis.text.y = element_text(size=14))+
  scale_x_discrete(limits = (label(dendro_data(try))$label))+
  scale_color_manual(values = c(cbbPalette[1:4], "grey"))+
  coord_flip()+
  xlab("")+
  ylab("")

ggsave("Risk_tree.jpeg", device = "jpeg", path = fig_path, width = 12, height = 10, units = "in", dpi = 450)

The code for factorizing the risk items can be found here

The four ‘real-world risk’ factors are distributed as follow

q_data %>%
  select(contains("scores")) %>%
  gather(key, value) %>%
  ggplot(aes(value))+
  geom_histogram()+
  facet_wrap(~key, scales = "free")

IV: All behavioral measures

machine_game_summary = machine_game_data_clean %>%
  group_by(Sub_id, facet_labels) %>%
  summarise(sum_correct = sum(correct1_incorrect0),
            mg_mean_rt = mean(Reaction_time, na.rm=T),
            mg_sd_rt = sd(Reaction_time)) %>%
  gather(key, value, -Sub_id, -facet_labels) %>%
  unite(temp, facet_labels, key) %>%
  spread(temp, value)

all_behavioral_data = machine_game_summary %>%
  left_join(bart_adjusted_pumps %>% select(-age_group), by="Sub_id") %>%
  left_join(q_data %>% rename(Sub_id = id), by="Sub_id") %>%
  ungroup()%>%
  select(-Sub_id)

all_behavioral_data

Calculating relationships between all behavioral measures controlling for age and intelligence

cor_df = data.frame(var1=NA, var2=NA, b_x=NA, p_x=NA, b_age = NA, p_age = NA, b_vocab_raw=NA, p_vocab_raw=NA, b_mr_raw=NA, p_mr_raw=NA)

dvs = names(all_behavioral_data %>% select(-calc_age, -vocab_raw, -mr_raw, -gender))
for(i in 1:(length(dvs)-1)){
  x = all_behavioral_data %>% pull(dvs[i])
  rem_dvs = dvs[-c(1:i)]
  
  for(j in 1:length(rem_dvs)){
    y = all_behavioral_data %>% pull(rem_dvs[j])
    
    m = lm(y ~ x + all_behavioral_data$calc_age+ all_behavioral_data$vocab_raw+ all_behavioral_data$mr_raw)
    
    cor_df = rbind(cor_df, c(var1=dvs[i], 
                             var2=rem_dvs[j], 
                             b_x=coefficients(m)["x"], 
                             p_x= coef(summary(m))["x","Pr(>|t|)"], 
                             b_age=coefficients(m)["all_behavioral_data$calc_age"], 
                             p_age= coef(summary(m))["all_behavioral_data$calc_age","Pr(>|t|)"], 
                          b_vocab_raw=coefficients(m)["all_behavioral_data$vocab_raw"],
                          p_vocab_raw= coef(summary(m))["all_behavioral_data$vocab_raw","Pr(>|t|)"],
                          b_mr_raw=coefficients(m)["all_behavioral_data$mr_raw"],
                          p_mr_raw= coef(summary(m))["all_behavioral_data$mr_raw","Pr(>|t|)"]))
  }
}
rm(dvs, x, y, m, i, j)

All correlations

cor_df = cor_df %>%
  filter(!is.na(var1)) %>%
  mutate(b_x = as.numeric(b_x),
         p_x = as.numeric(p_x),
         b_age = as.numeric(b_age),
         p_age = as.numeric(p_age),
         b_vocab_raw = as.numeric(b_vocab_raw),
         p_vocab_raw = as.numeric(p_vocab_raw),
         b_mr_raw = as.numeric(b_mr_raw),
         p_mr_raw = as.numeric(p_mr_raw))

cor_df %>%
  arrange(p_x) %>%
  datatable() %>%
  formatRound(columns=c('b_x', 'p_x', 'b_age', 'p_age', 'b_vocab_raw', 'p_vocab_raw', 'b_mr_raw', 'p_mr_raw'), digits=3)

All “significant” correlations

cor_df %>%
  filter(p_x<0.05)%>%
  arrange(p_x) %>%
  datatable() %>%
  formatRound(columns=c('b_x', 'p_x', 'b_age', 'p_age', 'b_vocab_raw', 'p_vocab_raw', 'b_mr_raw', 'p_mr_raw'), digits=3)

All “significant” correlations relating to “real-world risky behavior”

cor_df %>%
  filter(p_x<0.05)%>%
  filter(grepl("scores", var1) | grepl("scores", var2)) %>%
  arrange(p_x) %>%
  datatable() %>%
  formatRound(columns=c('b_x', 'p_x', 'b_age', 'p_age', 'b_vocab_raw', 'p_vocab_raw', 'b_mr_raw', 'p_mr_raw'), digits=3)

Significant correlations that survive multiple comparisons

cor_df%>%
  mutate(adj_p_x = p.adjust(p_x, method="fdr"))%>%
  filter(adj_p_x<0.05) %>%
  filter(grepl("scores", var1) | grepl("scores", var2)) %>%
  arrange(adj_p_x) %>%
  datatable() %>%
  formatRound(columns=c('b_x', 'p_x', 'b_age', 'p_age', 'b_vocab_raw', 'p_vocab_raw', 'b_mr_raw', 'p_mr_raw', 'adj_p_x'), digits=3)

Possible relationship between experimental measures and real-world behavior

p = all_behavioral_data %>%
  select(bart_mean_rt, '-5,+495_sum_correct', contains("scores")) %>%
  gather(key, value, -smoking_scores, -alcohol_scores, -rec_scores, -work_scores) %>% 
  gather(score_type, score_value, -key, -value) %>%
  mutate(key = ifelse(key == '-5,+495_sum_correct', 'High var + EV machine total correct', ifelse(key == "bart_mean_rt", "BART Mean RT", NA))) %>%
  ggplot(aes(value, score_value))+
  geom_point()+
  geom_smooth(method="lm")+
  facet_grid(score_type~key, scales="free")+
  ylab("")+
  xlab("")+
  theme(strip.text = element_text(size=14))

ggsave("Risk_cors.jpeg", device = "jpeg", path = fig_path, width = 7, height = 7, units = "in", dpi = 450)

---
title: "Developmental differences learning from large lossses"
output: 
html_document:
toc: true
toc_depts: 2
---

In addition to the RL task this dataset contains self-reported 'real-world' risk behaviors, several impultivity surveys and BART. One question of interest was whether any of the measures predicted the 'real-world' risk behaviors. This report looks into that question.

## DV: Real-world risk dimensions

Real-world risk was measures using two questionnaires with a total of 63 items. Not all of these had sufficient variance to be useful so we filtered out those that could not be transformed successfully to have skewness <2 and applied hierarchical clustering on the remaining items. This resulted in three factors: Alcohol, smoking, social risk taking.

Here is the list of items that were skewed and could not transformed successfully.

```{r echo=FALSE, message=FALSE, warning=FALSE}
source('/Users/zeynepenkavi/Dropbox/PoldrackLab/DevStudy_Analyses/code/workspace_scripts/DevStudy_workspace.R')
```

Here is the tree structure of the remaining items

```{r warning=FALSE, message=FALSE}
library(ggdendro)

try$labels = gsub(".logTr","",try$labels)
try$labels = gsub("_.*","",try$labels)

input_path = '/Users/zeynepenkavi/Dropbox/PoldrackLab/DevStudy_Analyses/input/'
survey_questions = read.csv(paste0(input_path,"survey_questions.csv"))

tmp = data.frame(label=try$labels) %>%
  left_join(survey_questions, by="label")

tmp$question = gsub("\\.", " ", tmp$question)
tmp$question = gsub("^\\d+|\\d+$", "", tmp$question) 
try$labels = tmp$question
```

```{r message=FALSE, warning=FALSE}
# ggdendrogram(try, rotate=TRUE)+
#   theme(panel.border = element_blank())

p = segment(dendro_data(try)) %>%
  mutate(seg_col = ifelse(yend<1.76 & xend < 8, 1, ifelse(yend<1.76 & xend>7 & xend<18, 2, ifelse(yend<1.76 & xend>17 & xend < 22, 3, ifelse(yend<1.76 & xend>21,4,5))))) %>%
  ggplot(aes(x = label))+
  geom_segment(aes(x = x, y = y, xend = xend, yend = yend, col=factor(seg_col)),alpha = 1, size=1.5)+
  theme(panel.border = element_blank(),
        panel.grid = element_blank(), 
        legend.position = "none",
        axis.text.y = element_text(size=14))+
  scale_x_discrete(limits = (label(dendro_data(try))$label))+
  scale_color_manual(values = c(cbbPalette[1:4], "grey"))+
  coord_flip()+
  xlab("")+
  ylab("")

ggsave("Risk_tree.jpeg", device = "jpeg", path = fig_path, width = 12, height = 10, units = "in", dpi = 450)
```

```{r echo=FALSE, out.width='100%'}
fig_name = 'Risk_tree.jpeg'

knitr::include_graphics(paste0(fig_path, fig_name))
```

The code for factorizing the risk items can be found [here](https://github.com/zenkavi/DevStudy_Analyses/blob/master/code/workspace_scripts/questionnaire_data.R)

The four 'real-world risk' factors are distributed as follow

```{r message=FALSE, warning=FALSE}
q_data %>%
  select(contains("scores")) %>%
  gather(key, value) %>%
  ggplot(aes(value))+
  geom_histogram()+
  facet_wrap(~key, scales = "free")
```

## IV: All behavioral measures

```{r}
machine_game_summary = machine_game_data_clean %>%
  group_by(Sub_id, facet_labels) %>%
  summarise(sum_correct = sum(correct1_incorrect0),
            mg_mean_rt = mean(Reaction_time, na.rm=T),
            mg_sd_rt = sd(Reaction_time)) %>%
  gather(key, value, -Sub_id, -facet_labels) %>%
  unite(temp, facet_labels, key) %>%
  spread(temp, value)

all_behavioral_data = machine_game_summary %>%
  left_join(bart_adjusted_pumps %>% select(-age_group), by="Sub_id") %>%
  left_join(q_data %>% rename(Sub_id = id), by="Sub_id") %>%
  ungroup()%>%
  select(-Sub_id)

all_behavioral_data
```

Calculating relationships between all behavioral measures controlling for age and intelligence

```{r warning=FALSE, message=FALSE}
cor_df = data.frame(var1=NA, var2=NA, b_x=NA, p_x=NA, b_age = NA, p_age = NA, b_vocab_raw=NA, p_vocab_raw=NA, b_mr_raw=NA, p_mr_raw=NA)

dvs = names(all_behavioral_data %>% select(-calc_age, -vocab_raw, -mr_raw, -gender))
for(i in 1:(length(dvs)-1)){
  x = all_behavioral_data %>% pull(dvs[i])
  rem_dvs = dvs[-c(1:i)]
  
  for(j in 1:length(rem_dvs)){
    y = all_behavioral_data %>% pull(rem_dvs[j])
    
    m = lm(y ~ x + all_behavioral_data$calc_age+ all_behavioral_data$vocab_raw+ all_behavioral_data$mr_raw)
    
    cor_df = rbind(cor_df, c(var1=dvs[i], 
                             var2=rem_dvs[j], 
                             b_x=coefficients(m)["x"], 
                             p_x= coef(summary(m))["x","Pr(>|t|)"], 
                             b_age=coefficients(m)["all_behavioral_data$calc_age"], 
                             p_age= coef(summary(m))["all_behavioral_data$calc_age","Pr(>|t|)"], 
                          b_vocab_raw=coefficients(m)["all_behavioral_data$vocab_raw"],
                          p_vocab_raw= coef(summary(m))["all_behavioral_data$vocab_raw","Pr(>|t|)"],
                          b_mr_raw=coefficients(m)["all_behavioral_data$mr_raw"],
                          p_mr_raw= coef(summary(m))["all_behavioral_data$mr_raw","Pr(>|t|)"]))
  }
}
rm(dvs, x, y, m, i, j)
```

## All correlations

```{r}
cor_df = cor_df %>%
  filter(!is.na(var1)) %>%
  mutate(b_x = as.numeric(b_x),
         p_x = as.numeric(p_x),
         b_age = as.numeric(b_age),
         p_age = as.numeric(p_age),
         b_vocab_raw = as.numeric(b_vocab_raw),
         p_vocab_raw = as.numeric(p_vocab_raw),
         b_mr_raw = as.numeric(b_mr_raw),
         p_mr_raw = as.numeric(p_mr_raw))

cor_df %>%
  arrange(p_x) %>%
  datatable() %>%
  formatRound(columns=c('b_x', 'p_x', 'b_age', 'p_age', 'b_vocab_raw', 'p_vocab_raw', 'b_mr_raw', 'p_mr_raw'), digits=3)
```

### All "significant" correlations

```{r}
cor_df %>%
  filter(p_x<0.05)%>%
  arrange(p_x) %>%
  datatable() %>%
  formatRound(columns=c('b_x', 'p_x', 'b_age', 'p_age', 'b_vocab_raw', 'p_vocab_raw', 'b_mr_raw', 'p_mr_raw'), digits=3)
```

### All "significant" correlations relating to "real-world risky behavior"

```{r}
cor_df %>%
  filter(p_x<0.05)%>%
  filter(grepl("scores", var1) | grepl("scores", var2)) %>%
  arrange(p_x) %>%
  datatable() %>%
  formatRound(columns=c('b_x', 'p_x', 'b_age', 'p_age', 'b_vocab_raw', 'p_vocab_raw', 'b_mr_raw', 'p_mr_raw'), digits=3)
```

### Significant correlations that survive multiple comparisons

```{r}
cor_df%>%
  mutate(adj_p_x = p.adjust(p_x, method="fdr"))%>%
  filter(adj_p_x<0.05) %>%
  filter(grepl("scores", var1) | grepl("scores", var2)) %>%
  arrange(adj_p_x) %>%
  datatable() %>%
  formatRound(columns=c('b_x', 'p_x', 'b_age', 'p_age', 'b_vocab_raw', 'p_vocab_raw', 'b_mr_raw', 'p_mr_raw', 'adj_p_x'), digits=3)
```

### Possible relationship between experimental measures and real-world behavior

```{r warning=FALSE, message=FALSE}
p = all_behavioral_data %>%
  select(bart_mean_rt, '-5,+495_sum_correct', contains("scores")) %>%
  gather(key, value, -smoking_scores, -alcohol_scores, -rec_scores, -work_scores) %>% 
  gather(score_type, score_value, -key, -value) %>%
  mutate(key = ifelse(key == '-5,+495_sum_correct', 'High var + EV machine total correct', ifelse(key == "bart_mean_rt", "BART Mean RT", NA))) %>%
  ggplot(aes(value, score_value))+
  geom_point()+
  geom_smooth(method="lm")+
  facet_grid(score_type~key, scales="free")+
  ylab("")+
  xlab("")+
  theme(strip.text = element_text(size=14))

ggsave("Risk_cors.jpeg", device = "jpeg", path = fig_path, width = 7, height = 7, units = "in", dpi = 450)

```

```{r echo=FALSE, out.width='100%'}
fig_name = 'Risk_cors.jpeg'

knitr::include_graphics(paste0(fig_path, fig_name))
```